Chapter 8 HMSC analysis

8.1 Load data

load("data/data.Rdata")
load("hmsc/fit_model1_250_10.Rdata")

8.2 Variance partitioning

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location")))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_v8j9mekiecd9bvx55i2c
variable mean sd
Random: location 37.900015 25.317903
logseqdepth 56.110626 25.796874
sex 4.937460 5.612719
origin 1.051899 1.282563
# Basal tree
varpart_tree <- genome_tree

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label))) %>%
   mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location"))))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__"))%>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__"))%>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
       scale_fill_manual(values=c("#506a96","#cccccc","#be3e2b","#f6de6c"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

8.3 Posterior estimates

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree

# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(value = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    #select(genome,sp_vulgaris,area_semi,area_urban,sp_vulgarisxarea_semi,sp_vulgarisxarea_urban,season_spring,season_winter,sp_vulgarisxseason_spring,sp_vulgarisxseason_winter) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__")) %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__")) %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top 

8.4 Correlations

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Refernece tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames) 


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

htree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
vtree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#create composite figure
grid.arrange(grobs = list(matrix,vtree),
             layout_matrix = rbind(c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1)))

8.5 Predict responses

# Select modelchain of interest
load("hmsc/fit_model1_250_10.Rdata")

gradient = c("domestic","feral")
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred <- constructGradient(m, 
                      focalVariable = "origin", 
                      non.focalVariables = list(logseqdepth=list(1),location=list(1))) %>%
            predict(m, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(origin=rep(gradient,1000)) %>%
            pivot_longer(!origin,names_to = "genome", values_to = "value")
# weights:  9 (4 variable)
initial  value 101.072331 
final  value 91.392443 
converged

8.5.0.1 Element level

elements_table <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred %>%
  group_by(origin, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "origin") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="origin")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "origin") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  tt()
tinytable_lgta5nwsnzl0cwsqthbw
trait mean p1 p9 positive_support negative_support
D0205 0.012598000 0.0023490224 0.023531763 0.948 0.052
D0906 0.003856608 0.0001844596 0.008372241 0.931 0.069
D0208 0.009860536 0.0017838550 0.017932200 0.922 0.078
D0504 0.004658815 0.0002366114 0.009902129 0.908 0.092
D0507 0.003955784 0.0001165261 0.007343063 0.908 0.092
B0103 0.008498845 0.0001308317 0.017178267 0.906 0.094
element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  tt()
tinytable_2qnsawc5opc0i50ip4xu
trait mean p1 p9 positive_support negative_support
D0801 -0.001652917 -0.002151535 -1.048160e-04 0.005 0.995
D0802 -0.001652917 -0.002151535 -1.048160e-04 0.005 0.995
D0517 -0.004601572 -0.007857518 -1.214754e-03 0.030 0.970
B0709 -0.002137926 -0.003711645 -5.770036e-04 0.035 0.965
B0302 -0.004889616 -0.010638724 -5.415953e-04 0.036 0.964
D0611 -0.004076752 -0.009381879 -2.137319e-04 0.042 0.958
D0903 -0.004076752 -0.009381879 -2.137319e-04 0.042 0.958
B0219 -0.004102791 -0.009531123 -2.138679e-04 0.043 0.957
D0601 -0.009419047 -0.017870105 -2.354803e-03 0.044 0.956
B0310 -0.012666099 -0.023473519 -2.643906e-03 0.046 0.954
D0817 -0.004961877 -0.010745331 -4.754330e-04 0.050 0.950
D0603 -0.001962825 -0.003870224 -3.352175e-04 0.052 0.948
D0807 -0.004206442 -0.008820872 -5.491684e-04 0.056 0.944
D0610 -0.003082764 -0.005046227 -8.629378e-04 0.057 0.943
B0804 -0.016028324 -0.029638644 -3.465492e-03 0.058 0.942
B0303 -0.011417557 -0.021341370 -1.732196e-03 0.070 0.930
B0603 -0.016022751 -0.032242536 -1.962945e-03 0.070 0.930
D0908 -0.015809551 -0.028824706 -2.833161e-03 0.071 0.929
B0214 -0.021197078 -0.039247507 -3.021418e-03 0.072 0.928
D0606 -0.005747534 -0.011465796 -5.709914e-04 0.074 0.926
D0508 -0.003289659 -0.007703875 -7.819120e-05 0.083 0.917
B0601 -0.008981230 -0.018091207 -6.178199e-04 0.086 0.914
B0401 -0.011562378 -0.022822641 -5.148959e-04 0.087 0.913
D0612 -0.001698286 -0.002927157 -9.510122e-05 0.087 0.913
B0309 -0.007734646 -0.015600834 -8.281195e-05 0.095 0.905
D0816 -0.005808299 -0.012158922 -2.032688e-04 0.096 0.904
B0204 -0.015135407 -0.032062082 -2.853902e-04 0.097 0.903
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.04,0.04)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

8.5.0.2 Function level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred %>%
  group_by(origin, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "origin") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="origin")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "origin") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated
function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  tt()
tinytable_6aqthmxo1jibuhcrsdva
trait mean p1 p9 positive_support negative_support
D02 8.297381e-03 -0.0034767283 0.0207677742 0.811 0.189
B08 7.845217e-03 -0.0031696928 0.0176622281 0.789 0.211
B01 1.081773e-03 -0.0065012399 0.0081741420 0.616 0.384
S01 8.592019e-04 -0.0130338151 0.0137154306 0.573 0.427
B10 2.004382e-06 -0.0002987702 0.0002626824 0.497 0.503
B09 3.708401e-05 -0.0005547998 0.0005152844 0.367 0.633
# Negatively associated
function_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  tt()
tinytable_ogjomrgc1iaher14oi2w
trait mean p1 p9 positive_support negative_support
D08 -0.0011505608 -0.002205838 -0.0001933344 0.043 0.957
B03 -0.0105800233 -0.018027872 -0.0024996566 0.061 0.939
D06 -0.0031587249 -0.006985716 0.0001070787 0.114 0.886
B04 -0.0080202588 -0.017941396 0.0014229755 0.152 0.848
D07 -0.0121782070 -0.028597144 0.0039737568 0.175 0.825
B06 -0.0066092580 -0.016952017 0.0030489265 0.189 0.811
D05 -0.0015695446 -0.007413673 0.0041523303 0.218 0.782
D03 -0.0041713761 -0.012777500 0.0033036602 0.230 0.770
S03 -0.0092505734 -0.031613460 0.0170041249 0.256 0.744
B02 -0.0031647641 -0.012342889 0.0051270290 0.284 0.716
D09 -0.0017619642 -0.007887604 0.0053050996 0.301 0.699
S02 -0.0043281804 -0.014879874 0.0035011445 0.343 0.657
B07 -0.0034669779 -0.015505790 0.0087514205 0.344 0.656
D01 -0.0001653947 -0.005040160 0.0048526834 0.435 0.565
positive <- function_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

negative <- function_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.02)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")